---
title: "Figure 2"
author: "Yingjie Fan"
date: "2023-06-02"
---

```{r setup, include=FALSE}
rm(list=ls())

#install.packages(c("dplyr", "data.table", "tidyr", "zoo", "ggrepel"))
library(dplyr)
library(data.table)
library(tidyr)
library(zoo)
library(ggplot2)
library(ggrepel)
path = "" # Insert path
```

Data Source:
All tweets were retrieved using Twitter Academic API as follows:

1. Registered for API Access at https://developer.twitter.com/en/products/twitter-api/academic-research
2. Secured API credentials and used https://github.com/cjbarrie/academictwitteR R package to access Twitter Academic API.
3. Set authorization credentials with the set_bearer() function to allow for storage of bearer token in the .Renviron file.

The exact query and code for data cleaning is as follows:

```{r Query: Tweets from Twitter API}
# set_bearer() #pass the authorization credentials.
# tweets <-
#   get_all_tweets(
#     users = c("CGTNOfficial","RT_com","XHNews","AJEnglish","PDChina","BBCBreaking","cnnbrk","ChinaDaily"),
#     start_tweets = "2007-01-01T00:00:00Z",
#     end_tweets = "2020-07-01T00:00:00Z",
#     file = "tweet",
#     data_path = path,
#     n = 10000000,
#   ) #retrieve the data

### Data cleaning
# tweets_cleaned<-tweets %>%
#   mutate(hashtags=as.character(c(map(tweets$entities$hashtags, 3))))%>%
#   mutate(urls=as.character(c(map(tweets$entities$url, 3))))%>%
#   mutate(mentions=as.character(c(map(tweets$entities$mentions, 3))))%>%
#   mutate(retweet=tweets$public_metrics$retweet_count)%>%
#   mutate(reply=tweets$public_metrics$reply_count)%>%
#   mutate(like=tweets$public_metrics$like_count)%>%
#   mutate(quote=tweets$public_metrics$quote_count)%>%
#   mutate(username=dplyr::recode(author_id, `1115874631`='CGTN',`87775422`='China Daily',`303862998`='People\'s Daily',`487118986`='Xinhua', `4970411`='Al Jazeera', `5402612`='BBC Breaking',`428333`='CNN Breaking',`64643056`='RT'))%>%
#   select(conversation_id,lang,id,possibly_sensitive,text,username,author_id,created_at,source,hashtags,urls,mentions,retweet,reply,like,quote)%>%
#   mutate(timestamp = as.Date(created_at))%>%
#     filter(timestamp<"2020-01-24")%>%
#   filter(timestamp>"2013-01-22")
# write.csv(tweets_cleaned,file=paste0(path,"/Tweets/tweets.csv"),fileEncoding = "UTF-8")
```

```{r Generate Intermediate Data}
#The number of tweets per day per account

# vol_daily_summary = tweets %>%
#   group_by(username,timestamp) %>%
#   summarise(n=n())

#write.csv(vol_daily_summary,file=paste0(path,"Data/Tweets/vol_daily_summary.csv"),fileEncoding = "UTF-8") #save the output for analysis
```


```{r Figure 2: The number of tweets per month}
### Read the tweets volume data
vol_d<-fread(paste0(path,'/Data/Tweets/vol_daily_summary.csv'))%>%
  mutate(timestamp=as.Date(timestamp))%>%
  mutate(month=as.yearmon(substr(timestamp,1,7)))%>%
  mutate(D=ifelse(timestamp>"2017-07-20",1,0))

### Generate Figure: the number of tweets per day
vol_china= vol_d %>%
  filter(timestamp<"2020-01-01")%>%
  group_by(username,month)%>%
  dplyr::summarize(n=sum(n))%>%
  filter(username %in% c("CGTN","China Daily","Xinhua","People\'s Daily"))%>%
  ggplot(aes(x=as.Date(month), y=n,linetype=username),size=0.5)+
  theme_bw()+
 geom_line()+
  expand_limits(x=as.Date("2020-12-01"))+
   scale_x_date(date_breaks = 'year', date_labels = '%Y')+
  geom_text_repel(aes(label=ifelse(month=="December 2019",as.character(username),'')),hjust=-0.3,vjust=0.3,direction= "y",segment.color = 'grey80')+
  labs(y="Number of Tweets per Month", x = "",title="")+ 
  theme(legend.position = "none",
        legend.title = element_blank(),
        legend.text=element_text(size=12), axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=12), axis.title=element_text(size=12))+
  scale_linetype_manual(values = c("CGTN" = "solid", "Xinhua" = "dotted","China Daily" = "longdash","People\'s Daily" = "dotdash"))

### Save Figure 2
ggsave(paste0(path, "/Output/figure2_volume.png"),vol_china,width=7, height=5)
```

